


import scrapy
from scrapy.http import Request
from xiaohua.items import XiaohuaItem


class XiaohuaSpider(scrapy.spiders.Spider):

    name = 'xiaohua'

    allowed_domains = ['xiaohua.com']
    start_urls = [
        "https://www.xiaohua.com/duanzi/"
    ]

    page_url = 'https://www.xiaohua.com'

    def parse(self, response):
        print(response.request.headers['User-Agent'])
        item = XiaohuaItem()
        items = response.xpath(
            '''
            //div[@class="one-cont"]
            /ul[@class="discuss clearfix number"]
            /li/i[@class="collect"]/following-sibling::span[text()>10]
            /../../../p/a
            ''').xpath('string(.)').extract()
        for joke in items:
            item['fonts'] = joke
            yield item
        next_page = response.xpath('//a[text()="下一页"]/@href').extract()[0]
        if next_page:
            next = self.page_url + str(next_page)
            print('nextpage url is %s' % next)
            yield Request(next, callback=self.parse)

